import scrapy
from laojiumen.items import LaojiumenItem
import requests
import re


class LaojiumenSpiderSpider(scrapy.Spider):
    name = 'laojiumen_spider'
    allowed_domains = ['www.nkdyw.com']
    start_urls = ['https://www.nkdyw.com/vodplay/194507-1-17/']

    def parse(self, response):
        laojiumen_item = LaojiumenItem()
        # 从原始网页获取集数
        laojiumen_item['EP'] = response.xpath(
            "//small[@class='text-muted']/text()").extract_first()

        # 获取包含视频链接的url
        new_url = response.xpath(
            "//div[@class='MacPlayer embed-responsive']//td/iframe/@src"
        ).extract()[0]

        # 请求url
        new_response = requests.get(new_url)

        # 使用正则筛选出视频链接
        laojiumen_item['url'] = re.search("http.*\.mp4",
                                          new_response.text).group()

        yield laojiumen_item

        next_link = response.xpath(
            "//ul[@class='myui-player__operate']/li[9]/a/@href").extract()
        if next_link:
            next_link = next_link[0]
            yield scrapy.Request("https://www.nkdyw.com" + next_link,
                                 callback=self.parse)
